# -*- coding: utf-8 -*-
# 爬取豆瓣网图书标签信息，存入redis
import requests
from pyquery import PyQuery as pq
import redis

def main():
    # 使用requests爬取豆瓣图书标签
    url="https://book.douban.com/tag/?view=type&icn=index-sorttags-all"
    res=requests.get(url)
    html=res.content.decode("utf-8")

    # 解析得到所有图书标签列表
    doc=pq(html)
    tagItems=doc(".tagCol tr td a")
    print("图书标签数量:",len(tagItems))

    # 指定redis数据库信息
    link=redis.StrictRedis(host='127.0.0.1',port=6379,db=0)
    # 循环将图书标签链接写入redis中
    count=0
    for a in tagItems.items():
        count+=1
        # 图书标签链接
        tag=a.attr.href
        link.lpush("book:tag_url",tag)
    print("保存图书标签数量：",count)

    
if __name__=="__main__":
    main()